{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "0776c4fc-2cef-4c64-b52c-8d42a72e4e13",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([6.320e-03, 1.800e+01, 2.310e+00, ..., 3.969e+02, 7.880e+00,\n",
       "       1.190e+01])"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 导入需要用到的package\n",
    "import numpy as np\n",
    "import json\n",
    "# 读入训练数据\n",
    "datafile = './work/housing.data'\n",
    "data = np.fromfile(datafile, sep=' ')\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "5a38c06b-25e5-429a-8489-098707d358ce",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 3.9690e+02, 4.9800e+00,\n",
       "        2.4000e+01],\n",
       "       [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 3.9690e+02, 9.1400e+00,\n",
       "        2.1600e+01],\n",
       "       [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 3.9283e+02, 4.0300e+00,\n",
       "        3.4700e+01],\n",
       "       ...,\n",
       "       [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 3.9690e+02, 5.6400e+00,\n",
       "        2.3900e+01],\n",
       "       [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 3.9345e+02, 6.4800e+00,\n",
       "        2.2000e+01],\n",
       "       [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 3.9690e+02, 7.8800e+00,\n",
       "        1.1900e+01]])"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 读入之后的数据被转化成1维array，其中array的第0-13项是第一条数据，第14-27项是第二条数据，以此类推.... \n",
    "# 这里对原始数据做reshape，变成N x 14的形式\n",
    "feature_names = [ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE','DIS', \n",
    "                 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV' ]\n",
    "feature_num = len(feature_names)\n",
    "data = data.reshape([data.shape[0] // feature_num, feature_num])\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "caa6fdc4-311e-47d4-8275-e6f7fba9762c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(14,)\n",
      "[6.320e-03 1.800e+01 2.310e+00 0.000e+00 5.380e-01 6.575e+00 6.520e+01\n",
      " 4.090e+00 1.000e+00 2.960e+02 1.530e+01 3.969e+02 4.980e+00 2.400e+01]\n"
     ]
    }
   ],
   "source": [
    "# 查看数据\n",
    "x = data[0]\n",
    "print(x.shape)\n",
    "print(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "baa74f7f-0c5a-4861-9441-34dcc9db1d43",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "404\n",
      "(404, 14)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([ 24.8017,   0.    ,  18.1   ,   0.    ,   0.693 ,   5.349 ,\n",
       "        96.    ,   1.7028,  24.    , 666.    ,  20.2   , 396.9   ,\n",
       "        19.77  ,   8.3   ])"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ratio = 0.8\n",
    "offset = int(data.shape[0] * ratio)\n",
    "print(offset)\n",
    "training_data = data[:offset]\n",
    "print(training_data.shape)\n",
    "training_data[offset-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "1a9f8e58-891b-4869-b594-a7f51434ad68",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
      "[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n"
     ]
    }
   ],
   "source": [
    "# 计算train数据集的最大值，最小值\n",
    "maximums, minimums = \\\n",
    "                     training_data.max(axis=0), \\\n",
    "                     training_data.min(axis=0)\n",
    "print(maximums)\n",
    "print(minimums)\n",
    "# 对数据进行归一化处理\n",
    "for i in range(feature_num):\n",
    "    data[:, i] = (data[:, i] - minimums[i]) / (maximums[i] - minimums[i])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "fb75f503-c5a5-4665-9a71-684f0e9b2dc2",
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_data():\n",
    "    # 从文件导入数据\n",
    "    datafile = './work/housing.data'\n",
    "    data = np.fromfile(datafile, sep=' ')\n",
    "\n",
    "    # 每条数据包括14项，其中前面13项是影响因素，第14项是相应的房屋价格中位数\n",
    "    feature_names = [ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', \\\n",
    "                      'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV' ]\n",
    "    feature_num = len(feature_names)\n",
    "\n",
    "    # 将原始数据进行Reshape，变成[N, 14]这样的形状\n",
    "    data = data.reshape([data.shape[0] // feature_num, feature_num])\n",
    "\n",
    "    # 将原数据集拆分成训练集和测试集\n",
    "    # 这里使用80%的数据做训练，20%的数据做测试\n",
    "    # 测试集和训练集必须是没有交集的\n",
    "    ratio = 0.8\n",
    "    offset = int(data.shape[0] * ratio)\n",
    "    training_data = data[:offset]\n",
    "\n",
    "    # 计算训练集的最大值，最小值\n",
    "    maximums, minimums = training_data.max(axis=0), training_data.min(axis=0)\n",
    "\n",
    "    # 对数据进行归一化处理\n",
    "    for i in range(feature_num):\n",
    "        data[:, i] = (data[:, i] - minimums[i]) / (maximums[i] - minimums[i])\n",
    "\n",
    "    # 训练集和测试集的划分比例\n",
    "    training_data = data[:offset]\n",
    "    test_data = data[offset:]\n",
    "    return training_data, test_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "399d5b1f-655c-4c20-99d0-cd983577b72b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 获取数据\n",
    "training_data, test_data = load_data()\n",
    "x = training_data[:, :-1]\n",
    "y = training_data[:, -1:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "509f27a8-e514-46ff-8683-4a164cd2d68c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.         0.18       0.07344184 0.         0.31481481 0.57750527\n",
      " 0.64160659 0.26920314 0.         0.22755741 0.28723404 1.\n",
      " 0.08967991]\n",
      "[0.42222222]\n"
     ]
    }
   ],
   "source": [
    "# 查看数据\n",
    "print(x[0])\n",
    "print(y[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "c17bee85-b2f9-4a6b-b08e-a3cb373f04a3",
   "metadata": {},
   "outputs": [],
   "source": [
    "w = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, -0.1, -0.2, -0.3, -0.4, 0.0]\n",
    "w = np.array(w).reshape([13, 1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "ba827079-45d7-4864-a02a-7dd3c248fbd8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.69474855]\n"
     ]
    }
   ],
   "source": [
    "x1=x[0]\n",
    "t = np.dot(x1, w)\n",
    "print(t)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "6dbc7238-de35-4010-87fd-caa79d049bde",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.49474855]\n"
     ]
    }
   ],
   "source": [
    "b = -0.2\n",
    "z = t + b\n",
    "print(z)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "fcaf502e-8103-453a-a171-b4f85606cf68",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Network(object):\n",
    "    def __init__(self, num_of_weights):\n",
    "        # 随机产生w的初始值\n",
    "        # 为了保持程序每次运行结果的一致性，\n",
    "        # 此处设置固定的随机数种子\n",
    "        np.random.seed(0)\n",
    "        self.w = np.random.randn(num_of_weights, 1)\n",
    "        self.b = 0.\n",
    "        \n",
    "    def forward(self, x):\n",
    "        z = np.dot(x, self.w) + self.b\n",
    "        return z"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "95917031-e796-4dec-bd36-c9c0205a6882",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2.39362982]\n"
     ]
    }
   ],
   "source": [
    "net = Network(13)\n",
    "x1 = x[0]\n",
    "y1 = y[0]\n",
    "z = net.forward(x1)\n",
    "print(z)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
